import pandas as pd
import numpy as np
import plotly
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
### Load vaccination data. ####
url_vaccination = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv'
owid_vaccination = pd.read_csv(url_vaccination, error_bad_lines=False, parse_dates = ['date'])
owid_vaccination.head()
# Get list of countries in the Our World In Data (owid) vaccination dataset.
owid_vaccination_countries = list(owid_vaccination.location.unique())
### Load owid dataset that has population by country as of 2020. ###
url_population = 'https://raw.githubusercontent.com/owid/covid-19-data/master/scripts/input/un/population_2020.csv'
owid_population = pd.read_csv(url_population, error_bad_lines=False)
owid_population.head()
# Rename columns to later merge with owid vaccination dataframe.
owid_population = owid_population.rename(columns={"entity": "location"})
# Get list of countries in the Our World In Data (owin) population dataset.
owid_population_countries = list(owid_population.location.unique())
### Load data with mapping of country to country group as defined in the Fiscal Space dataset. ###
path = '/Users/victoriadequadros/Dropbox/debt_exposure_covid/auxiliary_files/'
file_name = 'country_mapping.xlsx'
sheet = 'Fiscal Space and OWID' # sheet name or sheet number or list of sheet numbers and names
country_map = pd.read_excel(io=path+file_name, sheet_name=sheet, usecols = "B,C")
country_map.head(5)
# Rename columns to later merge with owid dataframe.
country_map = country_map.rename(columns={"Country group": "country_group", "Country name in Our World in Data": "location"})
# Get list of countries in the Fiscal Space dataset + Taiwan.
fs_countries = list(country_map['location'])
# Check countries that are in OWID vaccination but are not in FS
vacc_not_in_fs = np.setdiff1d(owid_vaccination_countries, fs_countries)
# Check countries that are in the OWID population but are not in FS
pop_not_in_fs = np.setdiff1d(owid_population_countries, fs_countries)
# Check countries that are in FS but are not in OWID vaccination
not_in_vacc = np.setdiff1d(fs_countries, owid_vaccination_countries)
# Merge datasets. Resulting dataset has two new columns: 'country_group' and 'population'.
# We merge using only keys from vaccination dataframe.
owid_merged1 = owid_vaccination.merge(country_map, how='left', on='location')
owid_merged = owid_merged1.merge(owid_population[['location', 'population']], how='left', on='location')
# Create new dataset that sums: total vaccinations, daily vaccination, and population by date and country group.
# Call this new dataset that is by country group simply by owid
owid = owid_merged.groupby(['country_group', 'date']).agg({'total_vaccinations':'sum', 'daily_vaccinations_raw':'sum', 'daily_vaccinations':'sum', 'population':'sum'}).reset_index()
# Create variable that is the cumulative sum of daily vaccinations.
owid['cum_daily_vacc'] = owid.groupby(['country_group'])['daily_vaccinations'].cumsum()
# Create new column with total_vaccinations per hundred
owid['total_vaccinations_per_hundred'] = (owid['cum_daily_vacc']/owid['population'])*(100)
# Drop obs before 15th december 2020. This is because only Advanced Economies had started.
owid = owid[~(owid['date'] < '2020-12-15')]
# Prepare data for plotting.
owid = owid.set_index('date')
# Create separate dataframes.
owid_advanced = owid[owid.country_group == "Advanced Economies"]
owid_emde = owid[owid.country_group == "EMDEs"]
# Do a 30-day moving average of total vaccivations per hundred
#owid_advanced['ma_total_vaccinations_per_hundred'] = owid_advanced['total_vaccinations_per_hundred'].rolling(7).mean()
#owid_emde['ma_total_vaccinations_per_hundred'] = owid_emde['total_vaccinations_per_hundred'].rolling(7).mean()
x = np.array(owid_advanced.index.values)
y1 = np.array(owid_advanced['total_vaccinations_per_hundred'])
y2 = np.array(owid_emde['total_vaccinations_per_hundred'])
plt.plot(x, y1, "-b", label="Advanced Economies")
plt.plot(x, y2, "-r", label="EMDEs")
plt.legend(loc="upper left")
plt.ylim(0, 140)
plt.title("Total vaccinations per hundred")
plt.savefig("total_vaccinations_per_hundred.png")
plt.show()
owid.sort_values(by = ['country_group', 'date', 'total_vaccinations_per_hundred'], ascending = False)
| country_group | total_vaccinations | daily_vaccinations_raw | daily_vaccinations | population | cum_daily_vacc | total_vaccinations_per_hundred | |
|---|---|---|---|---|---|---|---|
| date | |||||||
| 2021-07-19 | EMDEs | 2.447416e+09 | 14629709.0 | 22247770.0 | 4.674605e+09 | 2.624386e+09 | 56.141346 |
| 2021-07-18 | EMDEs | 2.059421e+09 | 13331190.0 | 24076770.0 | 5.196730e+09 | 2.602138e+09 | 50.072614 |
| 2021-07-17 | EMDEs | 2.516087e+09 | 21824636.0 | 24332350.0 | 5.549480e+09 | 2.578062e+09 | 46.455916 |
| 2021-07-16 | EMDEs | 2.453281e+09 | 24689460.0 | 23674745.0 | 5.586865e+09 | 2.553729e+09 | 45.709525 |
| 2021-07-15 | EMDEs | 2.509761e+09 | 27286272.0 | 23368678.0 | 5.609536e+09 | 2.530055e+09 | 45.102740 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 2020-12-19 | Advanced Economies | 1.196500e+04 | 721.0 | 86497.0 | 1.273833e+08 | 5.145020e+05 | 0.403901 |
| 2020-12-18 | Advanced Economies | 1.118400e+04 | 3981.0 | 86911.0 | 1.187278e+08 | 4.280050e+05 | 0.360493 |
| 2020-12-17 | Advanced Economies | 7.203000e+03 | 4179.0 | 86516.0 | 1.187278e+08 | 3.410940e+05 | 0.287291 |
| 2020-12-16 | Advanced Economies | 3.025000e+03 | 2300.0 | 85626.0 | 1.129356e+08 | 2.545780e+05 | 0.225419 |
| 2020-12-15 | Advanced Economies | 7.230000e+02 | 718.0 | 84835.0 | 1.129356e+08 | 1.689520e+05 | 0.149600 |
434 rows × 7 columns
import plotly.graph_objects as go
X = owid[owid['country_group']=='Advanced Economies'].index
fig = go.Figure()
fig.add_trace(go.Scatter(x=X,
y=owid[owid['country_group']=='Advanced Economies']['total_vaccinations_per_hundred'],
name='Advanced'))
fig.add_trace(go.Scatter(x=X,
y=owid[owid['country_group']=='EMDEs']['total_vaccinations_per_hundred'],
name='EMDEs'))
fig.update_layout(title='Total vaccinations per hundred',
template='plotly_white',
xaxis_title='Date',
yaxis_title='')
fig.show()
# Just to explore which countries were leading the uptick in vaccinations.
# Create variable that is the cumulative sum of daily vaccinations.
owid_merged['cum_daily_vacc'] = owid_merged['daily_vaccinations'].cumsum()
# Create new column with total_vaccinations per hundred
owid_merged['total_vaccinations_per_hundred'] = (owid_merged['cum_daily_vacc']/owid_merged['population'])*(100)
owid_merged.head()
owid_merged = owid_merged.sort_values(by = ['location', 'date'])
owid_merged['daily_diff_vacc_per_hundred'] = owid_merged.groupby(['location'])['total_vaccinations_per_hundred'].diff()
owid_merged
owid_merged = owid_merged.sort_values(by = ['country_group', 'date', 'daily_diff_vacc_per_hundred'], ascending = False)
owid_merged[owid_merged['country_group']=='EMDEs']
| location | iso_code | date | total_vaccinations | people_vaccinated | people_fully_vaccinated | daily_vaccinations_raw | daily_vaccinations | total_vaccinations_per_hundred | people_vaccinated_per_hundred | people_fully_vaccinated_per_hundred | daily_vaccinations_per_million | country_group | population | cum_daily_vacc | daily_diff_vacc_per_hundred | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 19843 | Mauritius | MUS | 2021-07-19 | 1040903.0 | 608103.0 | 432800.0 | NaN | 19910.0 | 609319.604377 | 47.82 | 34.03 | 15655.0 | EMDEs | 1.271767e+06 | 7.749126e+09 | 1.565538 |
| 29845 | Sri Lanka | LKA | 2021-07-19 | 7361893.0 | 5675329.0 | 1686564.0 | 114910.0 | 274790.0 | 41390.419292 | 26.50 | 7.88 | 12833.0 | EMDEs | 2.141325e+07 | 8.863034e+09 | 1.283271 |
| 19096 | Malaysia | MYS | 2021-07-19 | 14772221.0 | 10097841.0 | 4674380.0 | 424936.0 | 411316.0 | 23934.005458 | 31.20 | 14.44 | 12708.0 | EMDEs | 3.236600e+07 | 7.746480e+09 | 1.270827 |
| 27145 | Saudi Arabia | SAU | 2021-07-19 | 22870428.0 | 18441821.0 | 4428607.0 | 223939.0 | 377528.0 | 24492.242103 | 52.97 | 12.72 | 10844.0 | EMDEs | 3.481387e+07 | 8.526697e+09 | 1.084418 |
| 33258 | Uruguay | URY | 2021-07-19 | 4543194.0 | 2472903.0 | 2070291.0 | 30372.0 | 31165.0 | 325489.990348 | 71.19 | 59.60 | 8972.0 | EMDEs | 3.473727e+06 | 1.130663e+10 | 0.897163 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 25755 | Russia | RUS | 2020-12-17 | NaN | NaN | NaN | NaN | 3357.0 | 5792.129465 | NaN | NaN | 23.0 | EMDEs | 1.459345e+08 | 8.452713e+09 | 0.002300 |
| 6116 | China | CHN | 2020-12-16 | NaN | NaN | NaN | NaN | 187500.0 | 181.298886 | NaN | NaN | 130.0 | EMDEs | 1.439324e+09 | 2.609478e+09 | NaN |
| 25754 | Russia | RUS | 2020-12-16 | NaN | NaN | NaN | NaN | 3357.0 | 5792.127164 | NaN | NaN | 23.0 | EMDEs | 1.459345e+08 | 8.452710e+09 | NaN |
| 6115 | China | CHN | 2020-12-15 | 1500000.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | EMDEs | 1.439324e+09 | NaN | NaN |
| 25753 | Russia | RUS | 2020-12-15 | 28500.0 | 28500.0 | NaN | NaN | NaN | NaN | 0.02 | NaN | NaN | EMDEs | 1.459345e+08 | NaN | NaN |
20949 rows × 16 columns
plotly.offline.init_notebook_mode()